import requests
from lxml import etree

"""
    发送请求==调用接口
"""


class GuShiSpider():
    def __init__(self):
        self.url = "https://www.gushiwen.cn/gushi/tangshi.aspx"
        self.headers = {
            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36"
        }

    def send_request(self, url):
        response = requests.get(url, headers=self.headers)
        return response

    def parse_content(self, response):
        # with open("gs.html", "w", encoding="utf8") as f:
        #     f.write(response.text)
        html = etree.HTML(response.text)
        a_list = html.xpath('//span/a')
        for a in a_list:
            name = "".join(a.xpath("./text()"))
            href = "".join(a.xpath("./@href"))
            full_href = "https://www.gushiwen.cn" + href
            response_detail = self.send_request(full_href)
            if response_detail.status_code == 200:
                self.parse_detail_content(response_detail, name)

    def parse_detail_content(self, response_detail, name):
        html = etree.HTML(response_detail.text)
        content = "".join(html.xpath('//div[@class="contson"]//text()'))

        # 保存数据
        self.save_content(name, content)

    def save_content(self, name, content):
        print("正在保存{}".format(name))
        with open("gushi.txt", "a", encoding="utf8") as f:
            f.write(name)
            f.write("\n")
            f.write(content)
            f.write("-------------------------------------------------\n")

    def start(self):
        response = self.send_request(self.url)
        if response.status_code == 200:
            self.parse_content(response)


if __name__ == '__main__':
    gss = GuShiSpider()
    gss.start()
